# Function to retrieve the number of divergent transitionsGetNDivergencies <-function(stan_fit) { n_divergent <- stan_fit %>%nuts_params() %>%filter(Parameter =="divergent__") %>%pull(Value) %>%sum()return(n_divergent)}# Function to plot the marginal distributionsplotMarginals <-function(stan_fit_obj) { p_mu <- stan_fit_obj %>%mcmc_areas(regex_pars ="mu\\[|mu_mean|mu_sd|^mu$") p_nu <- stan_fit_obj %>%mcmc_areas(regex_pars ="nu\\[|nu_mean|nu_sd|^nu$") p_sigma <- stan_fit_obj %>%mcmc_areas(regex_pars ="sigma") p_bottom <-plot_grid(p_mu, p_nu, nrow =1)plot_grid(p_sigma, p_bottom, ncol =1, rel_heights =c(1, 5))}# Function to plot the posterior predictive pathsplotReclaimedPaths <-function(stan_fit_obj, data, title) { stan_fit_obj %>%as_draws_rvars() %>%spread_rvars(zplot[i, n]) %>%arrange(i, n) %>%mutate(t = data$cycles, # variable name `cycles` hard-codedtrue_path = data$value, unit =factor(data$name),noisy_obs = data$noisy_value ) %>%mutate(order =str_extract(unit, "\\d+") %>% as.integer, unit =fct_reorder(unit, order)) %>%ggplot(aes(x = t, dist = zplot, col = unit, group = unit, fill = unit)) +stat_dist_lineribbon(.width =c(0.95), alpha =0.2) +geom_line(aes(y = true_path)) +geom_point(aes(y = noisy_obs)) +xlim(0, NA) +facet_wrap(vars(unit), nrow =2) +theme(legend.position ="none") +labs(y ="estimated true degradation", x ="Hundreds of thousands of cycles") +ggtitle(title)}# Function for pgamma capable of handeling rvar objectsRvarPGamma <-rfun(pgamma)# Function for failure timeRvarFailureTimeFunction <-function(t, y_max_obs, t_max_obs, failure_threshold, mu, nu) {if(any(t < t_max_obs)) stop("All values of t must be greater than t_max_obs")# Calculate the minimum jump for failure f_delta_y <- failure_threshold - y_max_obs# Calculate the probability that delta y > minimum jump given the time step prob_failure <-RvarPGamma( f_delta_y,shape = (t - t_max_obs) / (nu ^2),rate =1/ (mu * (nu ^2)),lower.tail =FALSE )return(prob_failure)}# generate posterior predictive simulationsPlotPriorPredictiveCheck <-function(n, N_units =10, mu_ppc, nu_ppc) {if(!((nrow(mu_ppc) == n)&(nrow(nu_ppc) == n))){stop("The matrix for each parameter should have n rows.") }if(!((ncol(mu_ppc) == N_units)&(ncol(nu_ppc) == N_units))){stop("The matrix for each parameter should have N_units columns.") } N_obs <-9 z_itrs_ppc <-lapply(1:n,function(itr) { z_itr <-lapply(1:N_units,function(j) { jumps <-rgamma( N_obs,shape = (0.1/ nu_ppc[itr, j]^2),rate = (1/ (nu_ppc[itr, j]^2* mu_ppc[itr, j])) ) z <-c(0, cumsum(jumps))return(z) } ) %>%unlist() } ) %>%unlist() df <-data.frame(itr =factor(rep(1:n, each = ((N_obs +1) * N_units))),unit =factor(rep(rep(1:N_units, each = N_obs +1), n)),t =rep(seq(0, 0.9 , length.out = (N_obs +1)), (N_units * n)),z = z_itrs_ppc ) p <- df %>%ggplot(aes(x = t,y = z,colour = itr,group =interaction(itr, unit),linetype = itr ) ) +geom_line() +xlim(0, 1)return(p)}
1 Introduction
This document contains the supplementary material for Section 4 (Fitting a noisy gamma process) of the manuscript Leadbetter, R.L, Gonzalez Caceres, G. J. and Phatak, A. (2024) Fitting noisy gamma processes using RStan addapted to produce the figures and tables of Chap. 5: Noisy gamma process with unit-to-unit variability in the Thesis.
Here, we demonstrate how the basic noisy gamma process model can be expanded to analyse multiple units and how unit-to-unit variability can be included in the model as a random effect (partial pooling). We fit four models; - Complete pooling, which assumes all units arise from the same shared gamma process;
Partial pooling on \(\mu\), which assumes that each unit arises from different gamma processes that have the same coefficient of variation(volatility) and different, but similar, mean degradation rates;
Partial pooling on \(\nu\), which similarly assumes that each unit arises from different gamma processes, but instead, they all have the same mean degradation rate and individual coefficients of variation;
and Partial pooling on \(\mu\) and \(\nu\), which assumes that all units arise from different gamma processes whose mean wear rate and coefficient of variation are similar but not the same.
To showcase these four different models, we use the crack growth data published in Rodriguez-Picon et al. [2018], which contains the degradation pathways, each from different units and each consisting of ten degradation observations (crack length) taken at evenly spaced time intervals (expressed in hundreds of thousands of loading cycles); Figure 1 (a). In the example, a unit is considered to have failed once its crack length has exceeded \(0.4\). We add Gaussian noise to the degradation measurements in the crack growth dataset to simulate measurement error. Figure 1 (b) shows the noisy observations as dashed lines and the true underlying degradation pathways as solid lines.
# Tidy data and then ggplot2; re-order unit names so Unit 1 is not followed by Unit 10; # use cowplot to simplify themeCrackGrowth %>%pivot_longer(-cycles) %>%mutate(order =str_extract(name, "\\d+") %>% as.integer, name =fct_reorder(name, order)) %>%ggplot(aes(x = cycles, y = value, col = name)) +geom_line() +labs(y ="cumulative degradation", x ="Hundreds of thousands of cycles") +labs(color =NULL) +theme_minimal_grid(12)# Pivot object to tidy data# Add noisy data to tidy data frame but not at startset.seed(9485873)SD <-0.025CrackGrowth <- CrackGrowth %>%pivot_longer(-cycles) %>%mutate(noisy_value = value)CrackGrowth <- CrackGrowth %>%mutate(noisy_value = value +c(rep(0, 10), rnorm(length(value) -10, mean =0, sd = SD)) )CrackGrowth$noisy_value[CrackGrowth$noisy_value <0] <-abs( CrackGrowth$noisy_value[CrackGrowth$noisy_value <0])p_noisy_crackgrowth <- CrackGrowth %>%mutate(order =str_extract(name, "\\d+") %>% as.integer, name =fct_reorder(name, order)) %>%ggplot(aes(x = cycles, y = noisy_value, col = name)) +geom_line(linetype ="dashed") +geom_line(aes(y = value, col = name)) +labs(y ="cumulative degradation", x ="Hundreds of thousands of cycles") +labs(color =NULL) +ylim(0, 0.6) +theme_minimal_grid(12)p_noisy_crackgrowth
(a) The raw data from Rodriguez-Picon et al. [2018].
(b) The raw data, shown as solid lines, and noisy data, shown as dashed lines.
Figure 1: The crack growth dataset.
In order to use this data to fit degradation models in Stan, the data needs to be arranged in a list. Stan also uses hard coding, so we need to define the lengths of the different data objects.
# Format data for Stannoisy_data_mat <-matrix( CrackGrowth$noisy_value,ncol =length(unique(CrackGrowth$name)),nrow =length(unique(CrackGrowth$cycles)),byrow =TRUE)noisy_data_mat <- noisy_data_matstan_data <-list(I =nrow(noisy_data_mat),N =ncol(noisy_data_mat),t =unique(CrackGrowth$cycles),y = noisy_data_mat)
Next, we fit four different noisy GP models to the crack growth data using Stan.
In the proceeding sections, we define each of the models in Stan code, sample from the posterior, visualise the marginal posteriors of the parameters and the posterior predictive distributions of the true underlying degradation pathways, and plot the posterior predictive distribution for the failure time of a new unit and for unit three, which is under test but not failed. In the final sections, X and Y, we evaluate and compare each of the four different models’ abilities to predict a new unit’s degradation and the future degradation of the units currently under test by using cross-fold \(elppd\) methods (described in the main body of the paper).
where \(j\) is the unit and \(i\) is the observation. In Stan code this is:
We use the no-U-turn sampler in stan to sample \(6000\) draws from the posterior, using six chains, each \(2000\) draws long with a burn-in of \(1000\). We also set the sampling parameters adapt_delta and max_treedepth to \(0.99\) and \(14\) respectively to make sure that we get a detailed picture of the posterior in order to check for any bad behaviour during sampling.
Figure 2 (a) shows the marginal distributions of the parameters \(\mu\), \(\nu\), and \(\sigma\) and Table 1 shows the sampling chain diagnostics from the sampling. The complete pooling model has done a good job of reclaiming the true value of \(\sigma\) (\(0.025\)). There also does not appear to be any degenerate behaviour in the posteriors of the parameters Figure 2.
Table 1: The sampler outputs for the complete pooling model.
Parameter
mean
2.5%
50%
97.5%
n_eff
Rhat
sigma
0.03
0.02
0.03
0.04
2003
1.00
mu
0.38
0.33
0.38
0.44
7889
1.00
nu
0.21
0.15
0.21
0.29
652
1.01
# Plot the marginalsplotMarginals(stan_fit_cp)# Plot the two-way joint distributions (pairs)np_cp <-nuts_params(stan_fit_cp)p_pairs_pp_mu <-mcmc_pairs( stan_fit_cp,pars =c("sigma", "mu", "nu"),np = np_cp)p_pairs_pp_mu
(a) The marginal posterior distributions of the parameters.
(b) The joint posterior distributions of the parameters.
Figure 2: The complete pooling model’s posterior.
Since we are not working with simulated data, we do not know the true underlying data-generating process for the degradation paths. Therefore, to understand if the model sufficiently describes the data, we must look at the posterior distribution of the underlying(non-noisy) degradation paths. Figure 3 shows the posterior distribution for the underlying degradation paths as well as the true degradation path of each unit and the noisy degradation measurements.
Figure 3: The posterior distributions of the filtered degradation path for each unit using the complete-pooling model.
In each of the plots in Figure 3, the model has done a decent job of reclaiming the true degradation paths of each unit from the noisy data. For the most part, the true paths sit within the \(95/%\) credible intervals. Now that we are satisfied that the model has reasonably fit the data, we can derive failure time probabilities and other useful quantities.
To calculate the failure time distribution (with uncertainty intervals) for new units using the posterior of the complete pooling model, we simply use the posterior draws of the parameters \(\mu\) and \(\nu\) to calulate the probability that a new unit will have exceeded the soft failure threashold of \(0.4\) at exposure time \(t\).
# Extract posterior draws as rvarsstan_fit_rvar_cp <-as_draws_rvars(stan_fit_cp)# Create random variable objects for mu and numu_cp <- stan_fit_rvar_cp$munu_cp <- stan_fit_rvar_cp$nu# Create grid of timesft <-seq(0, 3, length.out =100)# Calculate failure probabilities over grid of timesft_dist_cp <-lapply( ft,function(tt) { ft_vect <-RvarFailureTimeFunction( tt,y_max_obs =0, # New unit in "new condition".t_max_obs =0, # Starts at t = 0.failure_threshold =0.4, # Failed when crack length > 0.4mu = mu_cp,nu = nu_cp ) ft_cp <- ft_vect %>%draws_of() %>%as.vector() %>%rvar() df <-data.frame(t = tt, density = ft_cp)return(df) })ft_dist_cp <-bind_rows(ft_dist_cp)# Plot as ribbon plotft_dist_cp <- ft_dist_cp %>%ggplot(aes(x = t, dist = density)) +stat_lineribbon(.width =c(.50, .80)) +scale_fill_brewer() +xlim(0, 2) +labs(y ="probability", x ="Hundreds of thousands of cycles") +theme_minimal_grid(12)ft_dist_cp
Figure 4: The failure time distribution of new units under the complete pooling model.
Compared to the FT distribution of the varying \(\mu\) model below, this has much less uncertainty. Perhaps that’s not surprising because there is one fewer source of uncertainty; nevertheless, it is surprisingly precise.
Similarly, we can calculate the failure time distribution for a unit currently under test. For example, Unit 3 has a true degradation of \(0.262\) at \(t = 0.9\), so we can generate a failure time distribution for Unit 3 conditioned on its current age and degradation level. Since in the complete pooling case we have assumed that all units share the same parameter values, deriving the failure time distribution for Unit 3 is very similar to the new unit case above, except now we are calculating the probability of the jump in degradation exceeding \(0.4 - z_{9,4}\) for times \(t > 0.9\).
# Get the current age and degradation level of unit 3t_current <-max(stan_data$t)post_z_3_I <- stan_fit_rvar_cp$z[9, 3]# Create grid of timesft <-seq(t_current, 3, length.out =100)# Calculate failure probabilities over grid of timesunit3_ft_dist_cp <-lapply( ft,function(tt) { ft_vect <-RvarFailureTimeFunction( tt,y_max_obs = post_z_3_I, # Current estimated degredation of unit 3.t_max_obs = t_current, # Current age of unit 3.failure_threshold =0.4, # Failed when crack length > 0.4mu = mu_cp,nu = nu_cp ) ft_cp <- ft_vect %>%draws_of() %>%as.vector() %>%rvar() df <-data.frame(t = tt, density = ft_cp)return(df) })unit3_ft_dist_cp <-bind_rows(unit3_ft_dist_cp)unit3_ft_dist_cp <- unit3_ft_dist_cp %>%ggplot(aes(x = t, dist = density)) +stat_lineribbon(.width =c(.50, .80)) +scale_fill_brewer() +xlim(0, 2) +labs(y ="probability", x ="Hundreds of thousands of cycles") +theme_minimal_grid(12)unit3_ft_dist_cp
Figure 5: The failure time distribution of Unit 3 under the complete pooling model.
# extract posterior drawsppc_post <-as_draws_df(stan_fit_cp) %>%spread_draws(mu, nu)# sample 3 pairs of mu and nu from the posterior drawsN_itr <-3N_units <-10sample_rows <-sample(1:nrow(ppc_post), size = N_itr)ppc_post <- ppc_post[sample_rows, ]mu_ppc <-matrix(ppc_post$mu, nrow = N_itr, ncol = N_units)nu_ppc <-matrix(ppc_post$nu, nrow = N_itr, ncol = N_units)# generate posterior predictive simulationsp_ppc_cp <-PlotPriorPredictiveCheck(n = N_itr,N_units = N_units, mu_ppc, nu_ppc)p_ppc_cp
3 Varying \(\mu\)
In the varying \(\mu\) model, we assume the units’ degradation paths arise from different underlying gamma processes, each with the same \(\nu\) and different but similar \(\mu\). To do this, we “pool” information between unit-specific \(\mu_j\) by assuming that they arise from a common distribution:
functions {real normal_trunc_rng(real a, real b) {real p_lb = normal_cdf(0, a, b);real u = uniform_rng(p_lb, 1);real y = a + b * inv_Phi(u);return y; }}data{int I; // number of observationsint N; // number of unitsvector[I] t; // the set of observation timesmatrix[I, N] y; // the set of noisy measurements}transformed data{vector[I-1] t_diff; // the differences of the observation times// t_diff[1] = t[1]; t_diff = t[2:I] - t[1:I-1];}parameters{real<lower = 0> sigma; // sd of measurement errorreal<lower = 0> mu_mean; // mean hyper parameter of the mu_nreal<lower = 0> mu_sd; // sd hyper parameter of the mu_nvector<lower = ((0 - mu_mean) / mu_sd)>[N] mu_nc; // the mean of the gamma processreal<lower = 0> nu_a;real<lower = 0> nu_b;matrix<lower = 0>[I-1, N] z_diff; // the jumps}transformed parameters{vector<lower = 0>[N] mu;real<lower = 0> nu; // coefficient of variationmatrix<lower = 0>[I-1, N] z; // filtered observationsfor (n in1:N){ mu[n] = (mu_nc[n] * mu_sd) + mu_mean; } nu = nu_a / sqrt(nu_b);// jumpsfor(n in1:N){ z[,n] = cumulative_sum(z_diff[,n] * mu[n]); }}model{// priors ////// for process model//!! Because everything is a real that means that every unit//!! has has the same parameters and and so each unit is an//!! observation of the same gamma process.//// mu_mean ~ normal(0.5, 0.2); mu_sd ~ cauchy(0, 1); mu_nc ~ normal(0, 1); nu_a ~ normal(0, 1); nu_b ~ gamma((0.5 * 3), (0.5 * 3)); sigma ~ uniform(0, 10);// process model //for(n in1:N){ z_diff[, n] ~ gamma(t_diff / pow(nu, 2), 1 / (pow(nu, 2))); }// data model // for (n in1:N){ y[2:I, n] ~ normal(z[, n], sigma); }}generated quantities{matrix[I, N] zplot;matrix[I, N] y_pred;real mu_ppd;real nu_ppd;vector[N] mu_i_ppd;vector[N] nu_i_ppd;// generate posterior predictive samplesfor(n in1:N){ y_pred[1, n] = 0; zplot[1, n] = 0;for(i in2:I){ y_pred[i, n] = normal_rng(z[i-1, n], sigma); zplot[i, n] = z[i-1,n]; } }// parameter PPDs for model comparison mu_ppd = normal_trunc_rng(mu_mean, mu_sd); nu_ppd = nu;for(n in1:N){ mu_i_ppd[n] = mu[n]; nu_i_ppd[n] = nu; }}
Once again, we sample \(6000\) draws from the posterior, using six chains, each \(2000\) draws long with a burn-in of \(1000\). Now, some divergent transitions crop up with the extra hierarchical layer in the model. However, there are very few divergencies, and there doesn’t appear to be any structure in their locations, Figure 6. From Table 2 and Figure 8, we can comfortably say that this model is able to reclaim the true value of the standard deviation of the measurement error to the same extent as the complete pooling model in the previous section.
Figure 9: The posterior distributions of the filtered degradation path for each unit under the varying mu model.
Once again, we can derive the failure time distribution and uncertainty for new units and units under test. Figure 10 (a) shows the failure time distribution for new units under the varying \(\mu\) model. Since the model assumes that each unit has a specific \(\mu_j\), to predict the failure time of new units, we first have to sample posterior predictive values of \(\tilde{\mu}_{J + 1}\) from the hierarchical prior \(\hbox{N}^{+}(\mu_\mu, \sigma_\mu)\) using the posterior draws of \(\mu_\mu\) and \(\sigma_\mu\). Using the posterior draws of \(\nu\) and the posterior predictive draw of \(\tilde{\mu}_{J + 1}\), we calculate the failure probabilities in the same way as we did for the complete pooling case in Figure 4. To generate the failure time distribution of a unit under test, we use the draws of \(\nu\) and the unit-specific parameter \(\mu_j\) to calculate the failure probability for \(t > 0.9\). Figure 10 (b) shows the current failure time distribution for Unit 3.
## New unit# Draws from posteriorstan_fit_rvar <-as_draws_rvars(stan_fit_pp_mu)mu_mean <- stan_fit_rvar$mu_meanmu_sd <- stan_fit_rvar$mu_sdnu <- stan_fit_rvar$nu# Sample posterior predictive distribution of muset.seed(123)RvarRNorm <-rfun(rnorm)mu_ppd <-abs(RvarRNorm(100, mu_mean, mu_sd))# Create grid of timesft <-seq(0, 3, length.out =100)# Calculate ft over timesft_dist <-lapply( ft,function(tt) { ft_vect <-RvarFailureTimeFunction( tt,y_max_obs =0,t_max_obs =0,failure_threshold =0.4,mu = mu_ppd,nu = nu ) ft <- ft_vect %>%draws_of() %>%as.vector() %>%rvar() df <-data.frame(t = tt, density = ft)return(df) })ft_dist <-bind_rows(ft_dist)# Plot as ribbon plotft_dist_pp_mu <- ft_dist %>%ggplot(aes(x = t, dist = density)) +stat_lineribbon(.width =c(.50, .80)) +scale_fill_brewer() +xlim(0, 2) +labs(y ="probability", x ="Hundreds of thousands of cycles") +theme_minimal_grid(12)ft_dist_pp_mu## Unit 3# Get current age and degredation and unit specific mu of Unit 3t_current <-max(stan_data$t)post_z_3_I_pp_mu <- stan_fit_rvar$z[9, 3]mu_3_pp_mu <- stan_fit_rvar$mu[3]# Create grid of timesft <-seq(t_current, 3, length.out =100)# Calculate failure probabilities over grid of timesunit3_ft_dist_pp_mu <-lapply( ft,function(tt) { ft_vect <-RvarFailureTimeFunction( tt,y_max_obs = post_z_3_I_pp_mu, # Current estimated degredation of unit 3.t_max_obs = t_current, # Current age of unit 3.failure_threshold =0.4, # Failed when crack length > 0.4mu = mu_3_pp_mu, # Unit specific mean for unit 3.nu = nu ) ft_cp <- ft_vect %>%draws_of() %>%as.vector() %>%rvar() df <-data.frame(t = tt, density = ft_cp)return(df) })unit3_ft_dist_pp_mu <-bind_rows(unit3_ft_dist_pp_mu)unit3_ft_dist_pp_mu <- unit3_ft_dist_pp_mu %>%ggplot(aes(x = t, dist = density)) +stat_lineribbon(.width =c(.50, .80)) +scale_fill_brewer() +xlim(0, 2) +labs(y ="probability", x ="Hundreds of thousands of cycles") +theme_minimal_grid(12)unit3_ft_dist_pp_mu
(a) The failure time distribution of new units under the varying mu model.
(b) The failure time distribution of Unit 3 under the varying mu model.
Figure 10: Varying mu model failure time distributions.
# extract posterior drawsppc_post <-as_draws_df(stan_fit_pp_mu) %>%spread_draws(mu_mean, mu_sd, nu)# sample 3 pairs of mu and nu from the posterior drawsN_itr <-3N_units <-10sample_rows <-sample(1:nrow(ppc_post), size = N_itr)ppc_post <- ppc_post[sample_rows, ]mu_ppc <-matrix(rnorm(N_itr*N_units, ppc_post$mu_mean, ppc_post$mu_sd),nrow = N_itr,ncol = N_units)nu_ppc <-matrix(ppc_post$nu, nrow = N_itr, ncol = N_units)# generate posterior predictive simulationsp_ppc_pp_mu <-PlotPriorPredictiveCheck(n = N_itr,N_units = N_units, mu_ppc, nu_ppc)p_ppc_pp_mu
4 Varying \(\nu\)
In the varying \(\nu\) model, we instead assume that the underlying gamma processes for each unit have the same mean wear rate but unit-specific coefficients of variation. We specify the model with the same hierarchical structure for \(\nu\) as we used for \(\mu\) in the previous model
functions {real normal_trunc_rng(real a, real b) {real p_lb = normal_cdf(0, a, b);real u = uniform_rng(p_lb, 1);real y = a + b * inv_Phi(u);return y; }}data{int I; // number of observationsint N; // number of unitsvector[I] t; // the set of observation timesmatrix[I, N] y; // the set of noisy measurements}transformed data{vector[I-1] t_diff; // the differences of the observation times// t_diff[1] = t[1]; t_diff = t[2:I] - t[1:I-1];}parameters{real<lower = 0> sigma; // sd of measurement errorreal<lower = 0> mu; // the mean of the gamma processreal<lower = 0> nu_a;real<lower = 0> nu_b;real<lower = 0> nu_sd; // sd hyper parameter of the nu_nvector<lower = ((0 - (nu_a / sqrt(nu_b))) / nu_sd)>[N] nu_nc;matrix<lower = 0>[I-1, N] z_diff; // the jumps}transformed parameters{real<lower = 0> nu_mean; // mean hyper parameter of the nu_nvector<lower = 0>[N] nu;matrix<lower = 0>[I-1, N] z; // filtered observations nu_mean = nu_a / sqrt(nu_b);for (n in1:N) nu[n] = (nu_nc[n] * nu_sd) + nu_mean;// jumpsfor(n in1:N){ z[,n] = cumulative_sum(z_diff[,n] * mu); }}model{// priors ////// for process model//!! Because everything is a real that means that every unit//!! has has the same parameters and and so each unit is an//!! observation of the same gamma process.//// mu ~ normal(0.5, 0.2); nu_a ~ normal(0, 1); nu_b ~ gamma((0.5 * 3), (0.5 * 3)); nu_sd ~ cauchy(0, 0.1); //10); nu_nc ~ normal(0, 1); //(nu_sd / 10)); sigma ~ uniform(0, 10);// process model //for(n in1:N){ z_diff[, n] ~ gamma(t_diff / pow(nu[n], 2), 1 / (pow(nu[n], 2))); }// data model //for (n in1:N){ y[2:I, n] ~ normal(z[, n], sigma); }}generated quantities{matrix[I, N] zplot;matrix[I, N] y_pred;real mu_ppd;real nu_ppd;vector[N] mu_i_ppd;vector[N] nu_i_ppd;// generate posterior predictive samplesfor(n in1:N){ y_pred[1, n] = 0; zplot[1, n] = 0;for(i in2:I){ y_pred[i, n] = normal_rng(z[i-1, n], sigma); zplot[i, n] = z[i-1,n]; } }// parameter PPDs for model comparison mu_ppd = mu; nu_ppd = normal_trunc_rng(nu_mean, nu_sd);for(n in1:N){ mu_i_ppd[n] = mu; nu_i_ppd[n] = nu[n]; }}
We once again generate \(6000\) draws from the posterior using Stan. The sampling for the varying \(\nu\) model is much more poorly behaved than the previous two models, with 120 divergent transitions.
Table 3: The sampler outputs for the varying nu model.
Parameter
mean
2.5%
50%
97.5%
n_eff
Rhat
sigma
0.03
0.02
0.03
0.04
1856
1.00
nu[1]
0.21
0.10
0.21
0.32
735
1.00
nu[2]
0.22
0.14
0.22
0.33
910
1.01
nu[3]
0.22
0.13
0.22
0.33
755
1.00
nu[4]
0.22
0.13
0.22
0.33
811
1.00
mu
0.38
0.32
0.37
0.44
2598
1.00
nu_mean
0.22
0.15
0.22
0.31
598
1.00
nu_sd
0.03
0.00
0.03
0.10
284
1.02
The pairs plots of the posterior draws are shown in Figure 11, with the divergent transitions shown in red. In the lower triangle of the pairs plot, the divergent transitions clearly arise when the \(\nu_j\) draws are very close to one another and hence very close to \(\mu_\nu\) and \(\sigma_\nu\) is very close to zero. In the marginal plots in Figure 12, there is very little evidence of a varying \(\nu\). Nevertheless, the model is able to reclaim the value of \(\sigma\) and the underlying degradation pathways, as shown in Figure 14.
Figure 13: The parallel coordinate plot of the MCMC draws for the varying nu model.
plotReclaimedPaths( stan_fit_pp_nu,data = CrackGrowth,"Varying coefficient of variation")
Figure 14: The posterior distributions of the filtered degradation path for each unit under the varying nu model.
The failure time distribution.
## New unit# Draws from posteriorstan_fit_rvar <-as_draws_rvars(stan_fit_pp_nu)nu_mean <- stan_fit_rvar$nu_meannu_sd <- stan_fit_rvar$nu_sdmu <- stan_fit_rvar$mu# Sample posterior predictive distribution of muset.seed(123)RvarRNorm <-rfun(rnorm)nu_ppd <-abs(RvarRNorm(100, nu_mean, nu_sd))# Create grid of timesft <-seq(0, 3, length.out =100)# Calculate ft over timesft_dist <-lapply( ft,function(tt) { ft_vect <-RvarFailureTimeFunction( tt,y_max_obs =0,t_max_obs =0,failure_threshold =0.4,mu = mu,nu = nu_ppd ) ft <- ft_vect %>%draws_of() %>%as.vector() %>%rvar() df <-data.frame(t = tt, density = ft)return(df) })ft_dist <-bind_rows(ft_dist)# Plot as ribbon plotft_dist %>%ggplot(aes(x = t, dist = density)) +stat_lineribbon(.width =c(.50, .80)) +scale_fill_brewer() +xlim(0, 2) +labs(y ="probability", x ="Hundreds of thousands of cycles") +theme_minimal_grid(12)## Unit 3# Get current age and degredation and unit specific nu of Unit 3t_current <-max(stan_data$t)post_z_3_I_pp_nu <- stan_fit_rvar$z[9, 3]nu_3_pp_nu <- stan_fit_rvar$nu[3]# Create grid of timesft <-seq(t_current, 3, length.out =100)# Calculate failure probabilities over grid of timesunit3_ft_dist_pp_nu <-lapply( ft,function(tt) { ft_vect <-RvarFailureTimeFunction( tt,y_max_obs = post_z_3_I_pp_nu, # Current estimated degredation of unit 3.t_max_obs = t_current, # Current age of unit 3.failure_threshold =0.4, # Failed when crack length > 0.4mu = mu,nu = nu_3_pp_nu # Unit specific mean for unit 3. ) ft_cp <- ft_vect %>%draws_of() %>%as.vector() %>%rvar() df <-data.frame(t = tt, density = ft_cp)return(df) })unit3_ft_dist_pp_nu <-bind_rows(unit3_ft_dist_pp_nu)unit3_ft_dist_pp_nu <- unit3_ft_dist_pp_nu %>%ggplot(aes(x = t, dist = density)) +stat_lineribbon(.width =c(.50, .80)) +scale_fill_brewer() +xlim(0, 2) +labs(y ="probability", x ="Hundreds of thousands of cycles") +theme_minimal_grid(12)unit3_ft_dist_pp_nu
(a) The failure time distribution of new units under the varying nu model.
(b) The failure time distribution of Unit 3 under the varying nu model.
Figure 15: Varying nu failure time distributions.
# extract posterior drawsppc_post <-as_draws_df(stan_fit_pp_nu) %>%spread_draws(mu, nu_mean, nu_sd)# sample 3 pairs of mu and nu from the posterior drawsN_itr <-3N_units <-10sample_rows <-sample(1:nrow(ppc_post), size = N_itr)ppc_post <- ppc_post[sample_rows, ]mu_ppc <-matrix(ppc_post$mu, nrow = N_itr, ncol = N_units)nu_ppc <-matrix(rnorm(N_itr*N_units, ppc_post$nu_mean, ppc_post$nu_sd),nrow = N_itr,ncol = N_units)# generate posterior predictive simulationsp_ppc_pp_nu <-PlotPriorPredictiveCheck(n = N_itr,N_units = N_units, mu_ppc, nu_ppc)p_ppc_pp_nu
5 Partial pooling on both mu and nu
The final model includes partial pooling of both \(\mu\) and \(\nu\). It uses the same hyper priors as we use in the two models above, Section 3 and Section 4,
The model with varying \(\mu\) and \(\nu\) suffers from the same sampling issues as the varying \(\nu\) model but much worse. However, the model still does a decent job at reclaiming the true parameter value of \(\sigma\), and it appears to fit the same unit-specific values of \(\mu\) as the varying \(\mu\) model in Section 3.
Figure 16: The pairs plots of the MCMC draws for the varying mu and nu model.
plotMarginals(stan_fit_pp_both)
Figure 17: The marginal posterior distributions of the parameters for the varying mu and nu model.
Similar to all the other models, this model is able to reclaim the true degradation pathways from the noisy observations.
plotReclaimedPaths( stan_fit_pp_both,data = CrackGrowth,"Varying mean and coefficient of variation")
Figure 18: The posterior distributions of the filtered degradation path for each unit under the varying mu and nu model.
## New unit# Draws from posteriorstan_fit_rvar <-as_draws_rvars(stan_fit_pp_both)mu_mean <- stan_fit_rvar$mu_meanmu_sd <- stan_fit_rvar$mu_sdnu_mean <- stan_fit_rvar$nu_meannu_sd <- stan_fit_rvar$nu_sd# Sample posterior predictive distribution of mu and nuset.seed(123)RvarRNorm <-rfun(rnorm)mu_ppd <-abs(RvarRNorm(100, mu_mean, mu_sd))nu_ppd <-abs(RvarRNorm(100, nu_mean, nu_sd))# Create grid of timesft <-seq(0, 3, length.out =100)# Calculate ft over timesft_dist <-lapply( ft,function(tt) { ft_vect <-RvarFailureTimeFunction( tt,y_max_obs =0,t_max_obs =0,failure_threshold =0.4,mu = mu_ppd,nu = nu_ppd ) ft <- ft_vect %>%draws_of() %>%as.vector() %>%rvar() df <-data.frame(t = tt, density = ft)return(df) })ft_dist <-bind_rows(ft_dist)# Plot as ribbon plotft_dist %>%ggplot(aes(x = t, dist = density)) +stat_lineribbon(.width =c(.50, .80)) +scale_fill_brewer() +xlim(0, 2) +labs(y ="probability", x ="Hundreds of thousands of cycles") +theme_minimal_grid(12)## Unit 3# Get current age and degredation and unit specific mu and nu of Unit 3t_current <-max(stan_data$t)post_z_3_I_pp_both <- stan_fit_rvar$z[9, 3]mu_3_pp_both <- stan_fit_rvar$mu[3]nu_3_pp_both <- stan_fit_rvar$nu[3]# Create grid of timesft <-seq(t_current, 3, length.out =100)# Calculate failure probabilities over grid of timesunit3_ft_dist_pp_both <-lapply( ft,function(tt) { ft_vect <-RvarFailureTimeFunction( tt,y_max_obs = post_z_3_I_pp_both, # Current estimated degredation of unit 3.t_max_obs = t_current, # Current age of unit 3.failure_threshold =0.4, # Failed when crack length > 0.4mu = mu_3_pp_both, # Unit specific mean for unit 3.nu = nu_3_pp_both # Unit specific CoV for unit 3. ) ft_cp <- ft_vect %>%draws_of() %>%as.vector() %>%rvar() df <-data.frame(t = tt, density = ft_cp)return(df) })unit3_ft_dist_pp_both <-bind_rows(unit3_ft_dist_pp_both)unit3_ft_dist_pp_both <- unit3_ft_dist_pp_both %>%ggplot(aes(x = t, dist = density)) +stat_lineribbon(.width =c(.50, .80)) +scale_fill_brewer() +xlim(0, 2) +labs(y ="probability", x ="Hundreds of thousands of cycles") +theme_minimal_grid(12)unit3_ft_dist_pp_both
(a) The failure time distribution of new units under the varying mu and nu model.
(b) The failure time distribution of Unit 3 under the varying mu and nu model.
Figure 19: Varying mu and nu failure time distributions.
# extract posterior drawsppc_post <-as_draws_df(stan_fit_pp_both) %>%spread_draws(mu_mean, mu_sd, nu_mean, nu_sd)# sample 3 pairs of mu and nu from the posterior drawsN_itr <-3N_units <-10sample_rows <-sample(1:nrow(ppc_post), size = N_itr)ppc_post <- ppc_post[sample_rows, ]mu_ppc <-matrix(rnorm(N_itr*N_units, ppc_post$mu_mean, ppc_post$mu_sd),nrow = N_itr,ncol = N_units)nu_ppc <-matrix(rnorm(N_itr*N_units, ppc_post$nu_mean, ppc_post$nu_sd),nrow = N_itr,ncol = N_units)# generate posterior predictive simulationsp_ppc_pp_both <-PlotPriorPredictiveCheck(n = N_itr,N_units = N_units, mu_ppc, nu_ppc)p_ppc_pp_both
data.frame(model =c("complete pooling","partial pooling mu","partial pooling nu","partial pooling mu and nu" ),"number of divergent transitions"=lapply(c(stan_fit_cp, stan_fit_pp_mu, stan_fit_pp_nu, stan_fit_pp_both), GetNDivergencies ) %>%unlist(),check.names =FALSE) %>%kbl(booktabs = T,format ="latex",caption ="The number of divergent transitions that occure during sampling.",escape =FALSE,label ="n_divergent" ) %>%kable_styling(latex_options ="striped") %>%save_kable(file =file.path( table_path,"n_divergent.tex" ),keep_tex =TRUE )
6 Cross-validation
In the above sections (Section 2, Section 3, Section 4, and Section 5), all four models appear to reasonably fit the data. They all manage to reclaim the true value of the standard deviation of the measurement error and are equally capable of reclaiming the underlying degradation pathways from the noisy observations. However, when we derive failure time distributions for new units from the different posteriors, the partial pooling models result in large uncertainty bands. Furthermore, the partial pooling models are more uncertain regarding the failure time of units under test. Therefore, we want to choose the model that fits the observed data but correctly quantifies the uncertainty of the underlying latent process so that we can accurately predict the future degradation of new units and the units under test. To do this, we use an objective measure of how well the model is able to predict unseen data using \(elppd\) and cross-validation methods. See the main body of the paper for a description of \(elppd\).
6.1 Leave-one-unit-out
To evaluate the predictive performance of the models for new units, we use leave-one-out cross-validation. We refit the model 10 times, each time withholding one of the units. For each withheld unit, we calculate the likelihood of the withheld observations given the draws of the posterior predictive distribution for a new unit (from the model fitted to the other N-1 units). These values are then used to estimate the log pointwise predictive density (https://arxiv.org/pdf/1507.04544.pdf eq. 3) in order to compare the predictive performance of the different Bayesian models. The bellow function calculates the \(lppd\) of a withheld observation for a given model.
set.seed(564)rvar_rnorm <-rfun(rnorm)rvar_dnorm <-rfun(dnorm)rvar_rgamma <-rfun(rgamma)LeaveOneUnitOutCV <-function(unit, model, data) {# Withold observations from one unit train_data <-list(I = data$I,N = data$N -1,t = data$t,y = data$y[, -c(unit)] ) test_data <- data$y[2:data$I, unit]# Fit model to N-1 units stan_fit <-sampling( model, train_data,chains =6,iter =900,#2000,warmup =300,#1000,seed =564,#control = list(# adapt_delta = 0.999,# max_treedepth = 15),refresh =0 )# Extract ppd of parameters post <- stan_fit %>%as_draws_rvars() %>%spread_rvars(mu_ppd, nu_ppd, sigma)## PPD of new unit (no noise) z_ppd <-rvar_rgamma( data$I -1,shape =diff(data$t) / (post$nu_ppd^2),rate =1/ (post$mu_ppd * post$nu_ppd^2) ) %>%cumsum()## Estimate log likelihood of new observations given PPD of Z_new elpd <-rvar_dnorm(test_data, z_ppd, post$sigma) %>%# y|z,theta ~ N(z, sigma)rvar_prod() %>%# calculate p(y_i|y_i-1)mean() %>%# average over draws to get estimatelog() # take the logreturn(elpd)}
I now repeatedly apply the function to calculate \(lppd\) for each unit in the dataset to get the \(\hbox{elppd}_{LOUO-CV}\) for each model. The results are displayed in Table 5.
Table 5: The ELPPD-LOUO-CV results for each model.
model
LOUO.CV
complete pooling
154.3189
partial pooling mu
153.5409
partial pooling nu
153.2858
partial pooling mu and nu
153.4567
6.2 Step ahead
To evaluate the model’s predictive performance for future degradation of the units currently under test, I use step-ahead cross-validation. I do this by iteratively withholding the last observation for each unit, fitting the model to all the other observations, generating posterior predictive draws for the non-noisy degradation of the withheld observation, and then calculating the \(lppd\) of the withheld observations.
6.2.1 Redefine stan models
The step ahead predictions require a slight re-working of the Stan code since there is no longer an even number of observations for each unit. Bellow, I redefine the Stan models so that they can handle one unit with \(I-1\) observations and also produce the posterior predictive draws of the non-noisy degradation for the withheld observation.
functions {real normal_trunc_rng(real a, real b) {real p_lb = normal_cdf(0, a, b);real u = uniform_rng(p_lb, 1);real y = a + b * inv_Phi(u);return y; }}data{int I; // number of observations J-1 unitsint I_j; // number of observations for j^th unitint N; // number of units (J-1)vector[I] t; // the set of observation timesmatrix[I, N] y; // the set of noisy measurements for J-1 unitsvector[I_j] y_j;}transformed data{vector[I-1] t_diff; // the differences of the observation times// t_diff[1] = t[1]; t_diff = t[2:I] - t[1:I-1];}parameters{real<lower = 0> sigma; // sd of measurement errorreal<lower = 0> mu; // mean of gpreal<lower = 0> nu_a;real<lower = 0> nu_b;matrix<lower = 0>[I-1, N] z_diff; // the jumpsvector<lower = 0>[I_j-1] z_diff_j;}transformed parameters{real<lower = 0> nu; // CoV of gpmatrix<lower = 0>[I-1, N] z; // filtered observationsvector<lower = 0>[I_j-1] z_j; // filtered observations of unit j nu = nu_a / sqrt(nu_b);// jumpsfor(n in1:N){ z[,n] = cumulative_sum(z_diff[,n]); } z_j = cumulative_sum(z_diff_j);}model{// priors ////// for process model//!! Because everything is a real that means that every unit//!! has has the same parameters and and so each unit is an//!! observation of the same gamma process.//// mu ~ normal(0.5, 0.2); nu_a ~ normal(0, 0.5); nu_b ~ gamma((0.5 * 3), (0.5 * 3)); sigma ~ uniform(0, 10);// process model //for(n in1:N){ z_diff[, n] ~ gamma(t_diff / pow(nu, 2), 1 / (mu * pow(nu, 2))); } z_diff_j ~ gamma(t_diff[1:(I_j - 1)] / pow(nu, 2), 1 / (mu * pow(nu, 2)));// data model //for (n in1:N){ y[2:I, n] ~ normal(z[, n], sigma); } y_j[2:I_j] ~ normal(z_j, sigma);}generated quantities{real mu_ppd;real nu_ppd;real z_diff_j_I_ppd;real z_j_I_ppd; mu_ppd = mu; nu_ppd = nu; z_diff_j_I_ppd = gamma_rng(0.1 / pow(nu_ppd, 2), 1 / (mu_ppd * pow(nu_ppd, 2))); z_j_I_ppd = z_j[I_j-1] + z_diff_j_I_ppd;}
functions {real normal_trunc_rng(real a, real b) {real p_lb = normal_cdf(0, a, b);real u = uniform_rng(p_lb, 1);real y = a + b * inv_Phi(u);return y; }}data{int I; // number of observations J-1 unitsint I_j; // number of observations for j^th unitint N; // number of units (J-1)vector[I] t; // the set of observation timesmatrix[I, N] y; // the set of noisy measurements for J-1 unitsvector[I_j] y_j;}transformed data{vector[I-1] t_diff; // the differences of the observation times// t_diff[1] = t[1]; t_diff = t[2:I] - t[1:I-1];}parameters{real<lower = 0> sigma; // sd of measurement errorreal<lower = 0> mu_mean; // mean hyper parameter of the mu_nreal<lower = 0> mu_sd; // sd hyper parameter of the mu_nvector<lower = ((0 - mu_mean) / mu_sd)>[N] mu_nc; // the mean of the gamma processreal<lower = ((0 - mu_mean) / mu_sd)> mu_nc_j; // the mean of the gamma processreal<lower = 0> nu_a;real<lower = 0> nu_b;matrix<lower = 0>[I-1, N] z_diff; // the jumpsvector<lower = 0>[I_j-1] z_diff_j;}transformed parameters{vector<lower = 0>[N] mu;real<lower = 0> mu_j;real<lower = 0> nu; // coefficient of variationmatrix<lower = 0>[I-1, N] z; // filtered observationsvector<lower = 0>[I_j-1] z_j; // filtered observations of unit jfor (n in1:N){ mu[n] = (mu_nc[n] * mu_sd) + mu_mean; } mu_j = (mu_nc_j * mu_sd) + mu_mean; nu = nu_a / sqrt(nu_b);// jumpsfor(n in1:N){ z[,n] = cumulative_sum(z_diff[,n] * mu[n]); } z_j = cumulative_sum(z_diff_j * mu_j);}model{// priors ////// for process model//!! Because everything is a real that means that every unit//!! has has the same parameters and and so each unit is an//!! observation of the same gamma process.//// mu_mean ~ normal(0.5, 0.2); mu_sd ~ cauchy(0, 1); mu_nc ~ normal(0, 1); mu_nc_j ~ normal(0, 1); nu_a ~ normal(0, 1); nu_b ~ gamma((0.5 * 3), (0.5 * 3)); sigma ~ uniform(0, 10);// process model //for(n in1:N){ z_diff[, n] ~ gamma(t_diff / pow(nu, 2), 1 / (pow(nu, 2))); } z_diff_j ~ gamma(t_diff[1:(I_j - 1)] / pow(nu, 2), 1 / (pow(nu, 2)));// data model //for (n in1:N){ y[2:I, n] ~ normal(z[, n], sigma); } y_j[2:I_j] ~ normal(z_j, sigma);}generated quantities{real mu_ppd;real nu_ppd;real z_diff_j_I_ppd;real z_j_I_ppd; mu_ppd = mu_j; nu_ppd = nu; z_diff_j_I_ppd = gamma_rng(0.1 / pow(nu_ppd, 2), 1 / (mu_ppd * pow(nu_ppd, 2))); z_j_I_ppd = z_j[I_j-1] + z_diff_j_I_ppd;}
functions {real normal_trunc_rng(real a, real b) {real p_lb = normal_cdf(0, a, b);real u = uniform_rng(p_lb, 1);real y = a + b * inv_Phi(u);return y; }}data{int I; // number of observations J-1 unitsint I_j; // number of observations for j^th unitint N; // number of units (J-1)vector[I] t; // the set of observation timesmatrix[I, N] y; // the set of noisy measurements for J-1 unitsvector[I_j] y_j;}transformed data{vector[I-1] t_diff; // the differences of the observation times// t_diff[1] = t[1]; t_diff = t[2:I] - t[1:I-1];}parameters{real<lower = 0> sigma; // sd of measurement errorreal<lower = 0> mu; // the mean of the gamma processreal<lower = 0> nu_a;real<lower = 0> nu_b;real<lower = 0> nu_sd; // sd hyper parameter of the nu_nvector<lower = ((0 - (nu_a / sqrt(nu_b))) / nu_sd)>[N] nu_nc;real<lower = ((0 - (nu_a / sqrt(nu_b))) / nu_sd)> nu_nc_j;matrix<lower = 0>[I-1, N] z_diff; // the jumpsvector<lower = 0>[I_j-1] z_diff_j;}transformed parameters{real<lower = 0> nu_mean; // mean hyper parameter of the nu_nvector<lower = 0>[N] nu;real<lower = 0> nu_j;matrix<lower = 0>[I-1, N] z; // filtered observationsvector<lower = 0>[I_j-1] z_j; // filtered observations of unit j nu_mean = nu_a / sqrt(nu_b);for (n in1:N){ nu[n] = (nu_nc[n] * nu_sd) + nu_mean; } nu_j = (nu_nc_j * nu_sd) + nu_mean;// jumpsfor(n in1:N){ z[,n] = cumulative_sum(z_diff[,n] * mu); } z_j = cumulative_sum(z_diff_j * mu);}model{// priors ////// for process model//!! Because everything is a real that means that every unit//!! has has the same parameters and and so each unit is an//!! observation of the same gamma process.//// mu ~ normal(0.5, 0.2); nu_a ~ normal(0, 1); nu_b ~ gamma((0.5 * 3), (0.5 * 3)); nu_sd ~ cauchy(0, 0.1); //10); nu_nc ~ normal(0, 1); //(nu_sd / 10)); nu_nc_j ~ normal(0, 1); //(nu_sd / 10)); sigma ~ uniform(0, 10);// process model //for(n in1:N){ z_diff[, n] ~ gamma(t_diff / pow(nu[n], 2), 1 / (pow(nu[n], 2))); } z_diff_j ~ gamma(t_diff[1:(I_j - 1)] / pow(nu_j, 2), 1 / (pow(nu_j, 2)));// data model //for (n in1:N){ y[2:I, n] ~ normal(z[, n], sigma); } y_j[2:I_j] ~ normal(z_j, sigma);}generated quantities{real mu_ppd;real nu_ppd;real z_diff_j_I_ppd;real z_j_I_ppd; mu_ppd = mu; nu_ppd = nu_j; z_diff_j_I_ppd = gamma_rng(0.1 / pow(nu_ppd, 2), 1 / (mu_ppd * pow(nu_ppd, 2))); z_j_I_ppd = z_j[I_j-1] + z_diff_j_I_ppd;}
functions {real normal_trunc_rng(real a, real b) {real p_lb = normal_cdf(0, a, b);real u = uniform_rng(p_lb, 1);real y = a + b * inv_Phi(u);return y; }}data{int I; // number of observations J-1 unitsint I_j; // number of observations for j^th unitint N; // number of units (J-1)vector[I] t; // the set of observation timesmatrix[I, N] y; // the set of noisy measurements for J-1 unitsvector[I_j] y_j;}transformed data{vector[I-1] t_diff; // the differences of the observation times// t_diff[1] = t[1]; t_diff = t[2:I] - t[1:I-1];}parameters{real<lower = 0> sigma; // sd of measurement errorreal<lower = 0> mu_mean; // mean hyper parameter of the mu_nreal<lower = 0> mu_sd; // sd hyper parameter of the mu_nvector<lower = ((0 - mu_mean) / mu_sd)>[N] mu_nc; // the mean of the gamma processreal<lower = ((0 - mu_mean) / mu_sd)> mu_nc_j;real<lower = 0> nu_a;real<lower = 0> nu_b;real<lower = 0> nu_sd; // sd hyper parameter of the nu_nvector<lower = ((0 - (nu_a / sqrt(nu_b))) / nu_sd)>[N] nu_nc; // coefficient of variationreal<lower = ((0 - (nu_a / sqrt(nu_b))) / nu_sd)> nu_nc_j; matrix<lower = 0>[I-1, N] z_diff; // the jumpsvector<lower = 0>[I_j-1] z_diff_j;}transformed parameters{vector<lower=0>[N] nu;real<lower=0> nu_j;vector<lower=0>[N] mu;real<lower=0> mu_j;real<lower = 0> nu_mean; // mean hyper parameter of the nu_nmatrix<lower = 0>[I-1, N] z; // filtered observationsvector<lower = 0>[I_j-1] z_j; // filtered observations of unit j nu_mean = nu_a / sqrt(nu_b);for (n in1:N) { nu[n] = (nu_nc[n] * nu_sd) + nu_mean; mu[n] = (mu_nc[n] * mu_sd) + mu_mean; } nu_j = (nu_nc_j * nu_sd) + nu_mean; mu_j = (mu_nc_j * mu_sd) + mu_mean;// jumpsfor(n in1:N){ z[,n] = cumulative_sum(z_diff[,n]); } z_j = cumulative_sum(z_diff_j);}model{// priors ////// for process model//!! Because everything is a real that means that every unit//!! has has the same parameters and and so each unit is an//!! observation of the same gamma process.//// mu_mean ~ normal(0.5, 0.2); mu_sd ~ cauchy(0, 1); mu_nc ~ normal(0, 1); mu_nc_j ~ normal(0, 1); nu_a ~ normal(0, 0.5); nu_b ~ gamma((0.5 * 3), (0.5 * 3)); nu_sd ~ cauchy(0, 0.1); nu_nc ~ normal(0, 1); nu_nc_j ~ normal(0, 1); sigma ~ uniform(0, 10);// process model //for(n in1:N){ z_diff[, n] ~ gamma(t_diff / pow(nu[n], 2), 1 / (mu[n] * pow(nu[n], 2))); } z_diff_j ~ gamma(t_diff[1:(I_j - 1)] / pow(nu_j, 2), 1 / (mu_j * pow(nu_j, 2)));// data model //for (n in1:N){ y[2:I, n] ~ normal(z[, n], sigma); } y_j[2:I_j] ~ normal(z_j, sigma);}generated quantities{real mu_ppd;real nu_ppd;real z_diff_j_I_ppd;real z_j_I_ppd; mu_ppd = mu_j; nu_ppd = nu_j; z_diff_j_I_ppd = gamma_rng(0.1 / pow(nu_ppd, 2), 1 / (mu_ppd * pow(nu_ppd, 2))); z_j_I_ppd = z_j[I_j-1] + z_diff_j_I_ppd;}